
pacman::p_load(tidyverse,
               fs,
               fst,
               assertthat,
               tictoc)

training_qtrs <- seq.Date(as.Date("2000-01-01"), as.Date("2022-12-31"), by = "quarter")

file_info <- tibble(file_name = dir_ls("../../data_qtr_rand_no_cid_with_outcome/")) %>% 
  mutate(
    qtr = ymd(str_extract(file_name, "[0-9]{8}")),
    rand_no_cid = str_extract(file_name, "[0-9]{4}(?=.fst)")
  )

training_files <- file_info %>% 
  filter(rand_no_cid %in% RAND_NO_CID)


tic(msg = "Loading tables")
df_raw <- map_dfr(training_files$file_name, function(x) {
  print(x) 
  read_fst(x, columns = c("cid", "qtr", "t_default")) %>% 
    mutate(
      rand_no_cid = str_extract(x, "[0-9]{4}(?=.fst)")
    )
})
toc()

df <- df_raw %>% 
  #group_by(cid, qtr) %>% 
  glimpse()

SPECIAL_SUFFIX
file_name_out <- paste0("../../data/varia/", SPECIAL_SUFFIX, "cid_qtr.csv")
write_csv(df, file_name_out)
